Plotting the variables for ensemble runs¶
Notes:
- enabled reading data on Baseline
Tips Baseline¶
- Use pyces env
import os,glob
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import socket
#yimport pyreadr # to read .rds files
Some Useful functions¶
def print_dict_tree(d, indent=0):
"""
Print the tree of the dictionary
"""
for key, value in d.items():
print(' ' * indent + str(key))
if isinstance(value, dict):
print_dict_tree(value, indent + 4)
Check which machine you are on! The path names will vary accordingly¶
machine_name = socket.gethostname()
if "baseline" in machine_name:
print("We are on: baseline")
elif "or-slurm" in machine_name:
print ("We are on: cades")
elif "MAC132004" in machine_name:
print("We are on: macbook")
else:
print("other than baseline, cades, or LAB macbook")
We are on: baseline
Copy all the fates_parameter files from ad_spinup run dir to following path.
Use rsync_params.sh!
# Paths
if "baseline" in machine_name:
path_params_head = "/gpfs/wolf2/cades/cli185/scratch/ud4/FATES_Outputs/runs/UQ/params/"
path_processed_head = "/gpfs/wolf2/cades/cli185/scratch/ud4/FATES_Outputs/runs/"
path_ensemble_vals = "/ccsopen/home/ud4/models/OLMT/data/lnd/clm2/paramdata/ensemble/"
if "MAC132004" in machine_name:
path_params_head = "/Users/ud4/FATESMDS_analysis/outputs/runs/tests_alp/2024/UQ/params/"
path_processed_head = "/Users/ud4/FATESMDS_analysis/outputs/runs/tests_alp/2024/"
N_ensembles=216
params_list = ["fates_cnp_vmax_nh4","fates_cnp_vmax_no3","fates_cnp_vmax_p"]
# Path to save results
path_save = "/gpfs/wolf2/cades/cli185/scratch/ud4/Results/EN_Vmax/"
fnames={} # processed data
fnames_params={} # processed data
#FACE_1PFT_AllomBlVmax_r240424_woL2FR_CONew_enVmax_ECA_processed
case_id = "FACE_1PFT_AllomBlVmax_r240424_woL2FR_CONew_enVmax"
case_id = "FACE_1PFT_AllomBlVmax_r240424_woL2FR_CONew_enVmax_RD"
case_id = "FACE_1PFT_AllomBlVmax_r240424_woL2FR_CONew_enVmax_ECA"
case_id = "FACE_1PFT_AllomBlVmax_r240424_wL2FR_CONew_enVmax"
case_id = "FACE_1PFT_AllomBlVmax_r240424_wL2FR_CONew_enVmax_RD"
case_id = "FACE_1PFT_AllomBlVmax_r240424_wL2FR_CONew_enVmax_ECA"
#case_id = "FACE_1PFT_AllomBl_r240424_woL2FRf_CONew_enVmax_ECA_processed"
#case_id = "FACE_1PFT_AllomBl_r240424_woL2FRf_CONew_enVmax_RD_processed"
sites= ("US-ORN", "US-DUK")
#sites= ("US-ORN",)#, "US-DUK")
for site in sites:
fnames_params[site] = {}
for idx in range(int(N_ensembles)):
# C-Only
i= idx+1
fnames[f"{case_id}_{site}_spins_g{i:05d}"] = f"{path_processed_head}{case_id}_processed/{case_id}_{site}_spins_g{i:05d}.nc"
fnames_params[site][f"{case_id}_{site}_spins_g{i:05d}"] = f"{path_params_head}{case_id}_{site}_I1850ELMFATES_ad_spinup/fates_params_{i:05d}.nc"
if params error add the case dir in case_params_copy.txt
then run rsync_params.sh
ds_params = {}
dict_params = {} # to store the params values
for site in sites:
dict_params[site] = {}
for parm in params_list:
dict_params[site][parm] = []
for site in sites:
for idx, key in enumerate(fnames_params[site].keys()):
#print (key)
ds_params[key] = xr.open_mfdataset(fnames_params[site][key], decode_times=False)
# making list of param values
for parm in params_list:
#print (f"{key}: {ds_params[key][parm].values[0]} ")
dict_params[site][parm].append(ds_params[key][parm].values[0])
import pandas as pd
# Replace 'file.txt' with the path to your text file
df_params = pd.read_csv(f'{path_ensemble_vals}bs_Vmax2_parm_vals',
names= params_list,
delim_whitespace=True)
df_params.index = ['g{:05d}'.format(i+1) for i in range(len(df_params))]
# Display the DataFrame
print(df_params)
plt.figure(figsize=(15,5))
df_params.plot(figsize=(15,5))
# Take the logarithm base 10 of the DataFrame values
df_log10 = np.log10(df_params)
# Plot the columns
df_log10.plot(figsize=(15,5),title="g")
plt.ylabel('log10 values')
plt.savefig(f"{path_save}/Params_en.png")
/tmp/ipykernel_1479910/2081599401.py:4: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead
df_params = pd.read_csv(f'{path_ensemble_vals}bs_Vmax2_parm_vals',
fates_cnp_vmax_nh4 fates_cnp_vmax_no3 fates_cnp_vmax_p g00001 1.000000e-11 1.000000e-11 1.000000e-11 g00002 1.000000e-11 1.000000e-11 1.000000e-10 g00003 1.000000e-11 1.000000e-11 1.000000e-09 g00004 1.000000e-11 1.000000e-11 1.000000e-08 g00005 1.000000e-11 1.000000e-11 1.000000e-07 ... ... ... ... g00212 1.000000e-06 1.000000e-06 1.000000e-10 g00213 1.000000e-06 1.000000e-06 1.000000e-09 g00214 1.000000e-06 1.000000e-06 1.000000e-08 g00215 1.000000e-06 1.000000e-06 1.000000e-07 g00216 1.000000e-06 1.000000e-06 1.000000e-06 [216 rows x 3 columns]
<Figure size 1500x500 with 0 Axes>
# Making a dataframe that will store the sum of first 20 years of GPP.
# extracting the timeseries of variables for selected variables and sites
dict_vars_data = {}
# also add the sum of n years of variable for sensitivity analysis
sum_n_years= 20
dict_param_sum_var = {}
variables = ["FATES_GPP",
"FATES_NPP"]
for var in variables:
dict_vars_data[var] = {}
dict_param_sum_var[var] = {}
for site in sites:
dict_vars_data[var][site]={}
dict_param_sum_var[var] [site]={}
dict_param_sum_var [var] [site] = df_params.copy(deep=True)
for key in fnames.keys():
if site in key: # Storing the ORNL and Duke data separately
dict_vars_data[var][site][key.split("_")[-1]]= xr.open_dataset (fnames[key], decode_times=False) [var]
sum_var = dict_vars_data[var][site][key.split("_")[-1]][:sum_n_years].sum().values
dict_param_sum_var [var] [site] .loc [key.split("_")[-1], f"Sum_{var}"] = sum_var
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
#var = "FATES_GPP"
#site = "US-ORN"
for var in variables:
for site in sites:
df = dict_param_sum_var [var] [site].copy(deep =True)
# Split dataset into features (parameters) and target variable (output)
X = df[params_list] # Features
y = df[f"Sum_{var}"] # Target variable
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Fit regression models for each parameter separately
models = {}
for param in X.columns:
# Choose a regression model (e.g., Linear Regression, Random Forest)
model = LinearRegression() # Change this to RandomForestRegressor() for Random Forest
# Fit the model
model.fit(X_train[[param]], y_train)
# Store the model
models[param] = model
# Evaluate model performance (e.g., R-squared)
for param, model in models.items():
score = model.score(X_test[[param]], y_test)
print(f'R-squared for {param}: {score}')
# Compare coefficients or feature importances
for param, model in models.items():
if isinstance(model, LinearRegression):
print(f'Coefficient for {param}: {model.coef_}')
elif isinstance(model, RandomForestRegressor):
print(f'Feature importance for {param}: {model.feature_importances_}')
# Assuming X_train and y_train are your training data
# Train a RandomForestRegressor model
model = RandomForestRegressor()
model.fit(X_train, y_train)
# Get feature importances from the trained model
feature_importances = model.feature_importances_
# Create a DataFrame to store feature importances
importance_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': feature_importances})
# Sort the DataFrame by importance values
importance_df = importance_df.sort_values(by='Importance', ascending=False)
tmp_case_name = "_".join([case_id.split("_")[3],case_id.split("_")[4],case_id.split("_")[-1]])
# Plot feature importances
plt.figure(figsize=(10, 6))
plt.bar(importance_df['Feature'], importance_df['Importance'])
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.title(f'Feature Importance {tmp_case_name}_{var}_{site}')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig(f"{path_save}/Sensivity_{tmp_case_name}_{var}_{site}.png")
plt.show()
R-squared for fates_cnp_vmax_nh4: -2.7651834230215377 R-squared for fates_cnp_vmax_no3: -8.838260256530342 R-squared for fates_cnp_vmax_p: -0.5934420745822295 Coefficient for fates_cnp_vmax_nh4: [0.05656858] Coefficient for fates_cnp_vmax_no3: [0.15208165] Coefficient for fates_cnp_vmax_p: [0.11844593]
R-squared for fates_cnp_vmax_nh4: -0.2274752101694375 R-squared for fates_cnp_vmax_no3: -0.7592267183133523 R-squared for fates_cnp_vmax_p: -0.03970626087370066 Coefficient for fates_cnp_vmax_nh4: [0.05264537] Coefficient for fates_cnp_vmax_no3: [0.34443011] Coefficient for fates_cnp_vmax_p: [0.2759315]
R-squared for fates_cnp_vmax_nh4: -3.3111425143059137 R-squared for fates_cnp_vmax_no3: -12.052637763542027 R-squared for fates_cnp_vmax_p: -0.7393521832751062 Coefficient for fates_cnp_vmax_nh4: [0.02620966] Coefficient for fates_cnp_vmax_no3: [0.08604801] Coefficient for fates_cnp_vmax_p: [0.0709103]
R-squared for fates_cnp_vmax_nh4: -0.25209094870890003 R-squared for fates_cnp_vmax_no3: -0.8425141531184737 R-squared for fates_cnp_vmax_p: -0.04529563256863178 Coefficient for fates_cnp_vmax_nh4: [0.041552] Coefficient for fates_cnp_vmax_no3: [0.24931344] Coefficient for fates_cnp_vmax_p: [0.19788289]
#dict_param_sum_var [var] [site]
#site
case_id
'FACE_1PFT_AllomBlVmax_r240424_wL2FR_CONew_enVmax_ECA'
#print (params_list)
for var in variables:
for site in sites:
df = dict_param_sum_var [var] [site].copy(deep =True)
marker_styles = ['o', 's', '^', 'D', 'x', '*']
plt.figure(figsize=(8, 6))
count = 0
for p1 in df_params[params_list[0]].unique():
for p2 in df_params[params_list[1]].unique():
for idx_p3, p3 in enumerate (df_params[params_list[2]].unique()):
subset = df_params[(df_params[params_list[0]] == p1) &
(df_params[params_list[1]] == p2) &
(df_params[params_list[2]] == p3)]
plt.plot(dict_vars_data[var][site][subset.index[0]], marker = marker_styles[idx_p3], label=f'{(p1,p2,p3)}', alpha =0.5)
count+=1
plt.title(f'{tmp_case_name}_{var}_{site}')
plt.xlabel('Index')
plt.ylabel('Output Variable')
#plt.legend()
plt.grid(True)
plt.savefig(f"{path_save}/TS_all_{tmp_case_name}_{var}_{site}.png")
plt.show()
print (count)
216
216
216
216
print (params_list)¶
for var in variables: for site in sites: df = dict_param_sum_var [var] [site].copy(deep =True) marker_styles = ['o', 's', '^', 'D', 'x', '*'] count = 0 for idx_p1, p1 in enumerate (df_params[params_list[0]].unique()): for idx_p2, p2 in enumerate(df_params[params_list[1]].unique()): for idx_p3, p3 in enumerate (df_params[params_list[2]].unique()): plt.figure(figsize=(8, 6)) subset = df_params[(df_params[params_list[0]] == p1) & (df_params[params_list[1]] == p2) & (df_params[params_list[2]] == p3)] plt.plot(dict_vars_data[var][site][subset.index[0]], marker = marker_styles[idx_p3], label=f'{(p1,p2,p3)}', alpha =0.5) count+=1 plt.title(f'{tmp_case_name}{var}{site}') plt.xlabel('Index') plt.ylabel('Output Variable') plt.legend() plt.ylim(0,1e-7) plt.grid(True) #plt.savefig(f"{path_save}/TS_{tmp_case_name}{var}{site}_{subset.index[0]}.png") #plt.show() print (var, site, count)
subset.index[0]
'g00216'
print (params_list)
df = dict_param_sum_var [var] [site].copy(deep =True)
marker_styles = ['o', 's', '^', 'D', 'x', '*']
for p1 in df_params[params_list[0]].unique():
for p2 in df_params[params_list[1]].unique():
plt.figure(figsize=(8, 6))
for idx_p3, p3 in enumerate (df_params[params_list[2]].unique()):
subset = df_params[(df_params[params_list[0]] == p1) &
(df_params[params_list[1]] == p2) &
(df_params[params_list[2]] == p3)]
plt.plot(dict_vars_data[var][site][subset.index[0]], marker = marker_styles[idx_p3], label=f'{(p1,p2,p3)}', alpha =0.5)
plt.title(f'Output Variable vs. Parameter1 (Param1={p3})')
plt.xlabel('Index')
plt.ylabel('Output Variable')
plt.legend()
plt.grid(True)
plt.show()
['fates_cnp_vmax_nh4', 'fates_cnp_vmax_no3', 'fates_cnp_vmax_p']
print (params_list)
df = dict_param_sum_var [var] [site].copy(deep =True)
marker_styles = ['o', 's', '^', 'D', 'x', '*']
for p1 in df_params[params_list[0]].unique():
for p2 in df_params[params_list[1]].unique():
plt.figure(figsize=(8, 6))
for idx_p3, p3 in enumerate (df_params[params_list[2]].unique()):
subset = df[(df_params[params_list[0]] == p1) &
(df_params[params_list[1]] == p2)]
plt.plot(subset [params_list[2]], subset [f"Sum_{var}"],label=f'{(p1,p2)}', alpha =0.5)
plt.legend()
plt.yscale('log')
plt.xscale('log')
plt.title(f'Output Variable vs. Parameter1 (Param1={p3})')
plt.xlabel('params_list[2]')
['fates_cnp_vmax_nh4', 'fates_cnp_vmax_no3', 'fates_cnp_vmax_p']
/tmp/ipykernel_1479910/467480283.py:6: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`. plt.figure(figsize=(8, 6))
for var in variables:
for site in sites:
fig, ax = plt.subplots()
for key in fnames.keys():
if site in key:
data = dict_vars_data[var][site][key.split("_")[-1]]
ax.plot(data, label=key.split("_")[-1])
ax.set_title(f"{var} at {site}")
plt.show()
# how to check which variable affects GPP the most?
df_params[params_list[0]].unique()
array([1.e-11, 1.e-10, 1.e-09, 1.e-08, 1.e-07, 1.e-06])
print (params_list) for p1 in df_params[params_list[0]].unique(): plt.figure(figsize=(8, 6)) for p2 in df_params[params_list[1]].unique(): for p3 in df_params[params_list[2]].unique(): subset = df_params[(df_params[params_list[0]] == p1) & (df_params[params_list[1]] == p2) & (df_params[params_list[2]] == p3)] plt.plot(dict_vars_data[var][site][subset.index[0]], label=f'Param2={p2}, Param3={p3}', alpha =.2) plt.title(f'Output Variable vs. Parameter1 (Param1={p1})') plt.xlabel('Index') plt.ylabel('Output Variable') plt.legend() plt.grid(True) plt.show()
print (params_list)
marker_styles = ['o', 's', '^', 'D', 'x', '*']
for p1 in df_params[params_list[0]].unique():
for p2 in df_params[params_list[1]].unique():
plt.figure(figsize=(8, 6))
for idx_p3, p3 in enumerate (df_params[params_list[2]].unique()):
subset = df_params[(df_params[params_list[0]] == p1) &
(df_params[params_list[1]] == p2) &
(df_params[params_list[2]] == p3)]
range = dict_vars_data[var][site][subset.index[0]].max() - dict_vars_data[var][site][subset.index[0]].min()
#if range > 3e-08:
if True:
plt.plot(dict_vars_data[var][site][subset.index[0]], marker = marker_styles[idx_p3], label=f'{(p1,p2,p3)}', alpha =0.5)
plt.title(f'Output Variable vs. Parameter1 (Param1={p3})')
plt.xlabel('Index')
plt.ylim(0,1e-7)
plt.ylabel('Output Variable')
plt.legend()
plt.grid(True)
plt.show()
['fates_cnp_vmax_nh4', 'fates_cnp_vmax_no3', 'fates_cnp_vmax_p']